#!/usr/bin/perl
use warnings;
use strict;
use URI;
use DB_File;

if (@ARGV != 2) {
    print STDERR "Usage: $0 [seeds_file] [digests_db_file]\n";
    exit(1);
}

my ($seeds_file, $digests_db_file) = @ARGV;
open IN, "< $seeds_file" or die "$!";
tie my %digests, 'DB_File', $digests_db_file or die "$!";
my ($total, $count) = (0, 0);
while (<IN>) {
    $total++;
    chomp;
    my $uri = URI->new($_);
    $uri = $uri->canonical;
    my $canonical_url = lc $uri->as_string;
    $count++ if exists $digests{$canonical_url};
}
untie %digests;
close IN;
printf STDERR "Recall: %.2f (%d/%d)\n", $count / $total, $count, $total;
